// USPVSEthnicity
log using USPVSEthnicity.log, replace
use "C:\Users\sbstjp\OneDrive - Cardiff University\PVScleandata.dta" // Prosser, Magasin, Proulx and Haddock, UK Progressive Values Dataset, April 2024 // Accessed on March 16 2025

// Create a weight
gen weight = 1

* Code variables into categories
recode age (min/24=1 "0-24") (25/34=2 "25-34") (35/44=3 "35-44") (45/54=4 "45-54") (55/max=5 "55+"), generate(age_group)
recode ethnicity (1=1) (2=2) (3=3) (5=4) (6=4) (7=4) (4=5) (8=6), generate(ethnic_group)
label define ethnic_group_lbl 1 "White" 2 "Black" 3 "Hispanic" 4 "Asian or Native Hawaiian/other Pacific Islander, non-Hispanic" 5 "Native American/Alaska Native or other race, non-Hispanic" 6 "Multiple races, non-Hispanic"
label values ethnic_group ethnic_group_lbl
recode education (3=1) (4=1) (5=1) (6=2) (7=2) (8=3) (9=4) (10=4) (11=4), generate(ed_group)
recode householdincome (1=1) (2=1) (3=1) (4=1) (5=1) (6=1) (7=1) (8=2) (9=2) (10=2) (11=2) (12=3) (13=4), generate(inc_group)

* Generate totals for the weights - these are based on the ANES24 pre-election wave as this dataset has political selfid, unlike census data. The below are for 1-4, i.e. very liberal to centrist, on the political selfid scale. This mirrors the PVS sample. 

replace gender=. if inrange(gender, 3, 5)
rename gender FemaleGender
gen sextot=.
replace sextot = 0.4663 if FemaleGender == 1 // Male
replace sextot = 0.5337 if FemaleGender == 2 // Female

gen agetot=.
replace agetot = 0.1227 if age_group == 1 //18-24
replace agetot = 0.1848 if age_group == 2 //25-34
replace agetot = 0.1891 if age_group == 3 //35-44
replace agetot = 0.1544 if age_group == 4 //45-54
replace agetot = 0.3491 if age_group == 5 //55+

gen ethtot=. 
replace ethtot = 0.6038 if ethnic_group == 1 // White
replace ethtot = 0.1186 if ethnic_group == 2 // Black
replace ethtot = 0.1592 if ethnic_group == 3 // Hispanic
replace ethtot = 0.0659 if ethnic_group == 4 // Asian or Native Hawaiian
replace ethtot = 0.0024 if ethnic_group == 5 // Native American
replace ethtot = 0.0502 if ethnic_group == 6 // Multiple

gen edtot=.
replace edtot = 0.4408 if ed_group == 1 // Uptosomecollege
replace edtot = 0.0926 if ed_group == 2 // Trade/assocdegree
replace edtot = 0.2754 if ed_group == 3 // Undergraddegree
replace edtot = 0.1912 if ed_group == 4 // Postgradandabove

gen inctot=.
replace inctot = 0.2528 if inc_group == 1 //under 60k
replace inctot = 0.2084 if inc_group  == 2 //60-100k
replace inctot = 0.1966 if inc_group  == 3 //100-150k
replace inctot = 0.3422 if inc_group  == 4 //over 150k

* Rake the weights using the Stata survwgt package
survwgt rake weight , by(FemaleGender age_group ethnic_group ed_group inc_group) totvars(sextot agetot ethtot edtot inctot) generate(rakedweight)

// Demographics
*Generate dummies
gen Graduate=. 
replace Graduate=0 if inrange(education, 3, 7)
replace Graduate=1 if inrange(education, 8, 11)

gen White=.
replace White=1 if ethnic_group==1
replace White=0 if inrange(ethnic_group, 2, 6)

gen Black=.
replace Black=1 if ethnic_group==2
replace Black=0 if ethnic_group==1
replace Black=0 if inrange(ethnic_group, 3, 6)

gen Hispanic=.
replace Hispanic=1 if ethnic_group==3
replace Hispanic=0 if inrange(ethnic_group, 4, 6)
replace Hispanic=0 if inlist(ethnic_group, 1, 2)

gen Asian=.
replace Asian=1 if ethnic_group==4
replace Asian=0 if inrange(ethnic_group, 1, 3)
replace Asian=0 if inlist(ethnic_group, 5, 6)

gen Multiracial=.
replace Multiracial=1 if ethnic_group==6
replace Multiracial=0 if inrange(ethnic_group, 1, 5)

// Standardize and rename
egen Age = std(age)
egen Income = std(householdincome)

* Change to 1-2 scale, so it's the same as other dependent variables in the book
foreach var in pvs {
    gen s`var' = 1 + (`var' - 1) / (7 - 1)
}

rename spvs PVS

// Regressions and correlations
regress PVS Age FemaleGender Graduate Income White [pweight= rakedweight], robust
eststo
regress PVS Age FemaleGender Graduate Income Black [pweight= rakedweight], robust
eststo
regress PVS Age FemaleGender Graduate Income Hispanic [pweight= rakedweight], robust
eststo
regress PVS Age FemaleGender Graduate Income Asian [pweight= rakedweight], robust
eststo
regress PVS Age FemaleGender Graduate Income Multiracial [pweight= rakedweight], robust
eststo
esttab

pwcorr PVS White Black Hispanic Asian Multiracial [aweight= rakedweight], sig

log close
